# analysis_section_4_2_part2.R
# This script replicates the analysis in Section 4.2 (Part 2) of the paper
#   Figure 4, Table 1, and Table SI.7
# Author: Yimeng Li
# Dependencies:
#   Data/VR_VoteCal_LA.Rdata

# Load libraries:
library(did)
library(dplyr)
library(ggplot2)
library(patchwork)
library(sandwich)
library(tidyr)

# Load data:
load("./Replication Package/Data/VR_VoteCal_LA.Rdata")

# Convert data from wide to long format for analysis
VR_VoteCal_LA_long <- VR_VoteCal_LA %>%
  filter(RegisteredBefore2016Pri == 1, !is.na(Dist)) %>%
  mutate(Row_No = row_number(),
         Age_18_to_29 = if_else(Age >= 18 & Age <= 29, 1, 0),
         Age_30_to_44 = if_else(Age >= 30 & Age <= 44, 1, 0),
         Age_45_to_64 = if_else(Age >= 45 & Age <= 64, 1, 0),
         Age_65_or_older = if_else(Age >= 65, 1, 0),
         Male = if_else(gender_inf < 0.5, 1, 0),
         Female = if_else(gender_inf >= 0.5, 1, 0),
         White = if_else(pred.whi >= pred.whi & pred.whi > pred.bla & pred.whi > pred.his & pred.whi > pred.asi & pred.whi > pred.oth, 1, 0),
         Black = if_else(pred.bla > pred.whi & pred.bla >= pred.bla & pred.bla > pred.his & pred.bla > pred.asi & pred.bla > pred.oth, 1, 0),
         Hispanic = if_else(pred.his > pred.whi & pred.his > pred.bla & pred.his >= pred.his & pred.his > pred.asi & pred.his > pred.oth, 1, 0),
         Asian = if_else(pred.asi > pred.whi & pred.asi > pred.bla & pred.asi > pred.his & pred.asi >= pred.asi & pred.asi > pred.oth, 1, 0),
         Other = if_else(pred.oth > pred.whi & pred.oth > pred.bla & pred.oth > pred.his & pred.oth > pred.asi & pred.oth >= pred.oth, 1, 0)) %>%
  select(Row_No, Dist, UVBM, IsPermanentVBM, Turnout.2020, Turnout.2016,
         Age_18_to_29, Age_30_to_44, Age_45_to_64, Age_65_or_older, Male, Female,
         White, Black, Hispanic, Asian, Other,
         Perc_bachelor_or_more, median_hh_income, median_value) %>%
  pivot_longer(cols = c("Turnout.2020", "Turnout.2016"),
               names_to = c(".value", "Year"),
               names_sep = "[.]") %>%
  mutate(Pri2020 = if_else(Year == 2020, 1, 0),
         median_hh_income_in_100K = median_hh_income/100000,
         median_value_in_1m = median_value/1000000,
         Period = case_when(Year == 2016 ~ 1,
                            Year == 2020 ~ 2,
                            TRUE ~ NA_real_),
         Group = case_when(UVBM == 1 ~ 2,
                           UVBM == 0 ~ 0,
                           TRUE ~ NA_real_))

#*******************************************************************************
# Difference-in-Differences Analysis with Covariate-Specific Fixed Effects
#   Results Reported in Table SI.7
#*******************************************************************************

# DiD for Non-Permanent VBM Voters
# With No Covariate-Specific Fixed Effects
DiD_NPAV <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# With Demographic Fixed Effects
DiD_NPAV_demo <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# With Demographic and Socioeconomic Fixed Effects
DiD_NPAV_demo_socioecon <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other +
    Perc_bachelor_or_more + median_hh_income_in_100K + median_value_in_1m,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# DiD for Permanent VBM Voters
# With No Covariate-Specific Fixed Effects
DiD_PAV <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Perm. VBM")
)

# With Demographic Fixed Effects
DiD_PAV_demo <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Perm. VBM")
)

# With Demographic and Socioeconomic Fixed Effects
DiD_PAV_demo_socioecon <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other +
    Perc_bachelor_or_more + median_hh_income_in_100K + median_value_in_1m,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Perm. VBM")
)

#*******************************************************************************
# Table SI.7
#   Difference-in-Differences with Covariate-Specific Fixed Effects
#*******************************************************************************

# This function formats the model outputs for Table SI.7
# Extract Coefficient Estimates and Clustered Standard Errors
# Present Clustered Standard Errors below Coefficient Estimates
format_model_outputs <- function(model){
  model_outputs <- data.frame(
    term = attributes(model$coefficients)$names,
    coef = round(as.numeric(model$coefficients), 1),
    se = round(sqrt(diag(vcovCL(model, cluster = ~ Dist))), 2)
  ) %>%
    select(term, coef, se) %>%
    pivot_longer(cols = c(coef, se), names_to = "coef_or_se", values_to = "value")
  return(model_outputs)
}

# Full Table
Table_SI7 <- format_model_outputs(DiD_NPAV_demo_socioecon) %>%
  select(term, coef_or_se) %>%
  left_join(format_model_outputs(DiD_NPAV), by = c("term", "coef_or_se")) %>%
  left_join(format_model_outputs(DiD_NPAV_demo), by = c("term", "coef_or_se"), suffix = c("", "_2")) %>%
  left_join(format_model_outputs(DiD_NPAV_demo_socioecon), by = c("term", "coef_or_se"), suffix = c("", "_3")) %>%
  left_join(format_model_outputs(DiD_PAV), by = c("term", "coef_or_se"), suffix = c("", "_4")) %>%
  left_join(format_model_outputs(DiD_PAV_demo), by = c("term", "coef_or_se"), suffix = c("", "_5")) %>%
  left_join(format_model_outputs(DiD_PAV_demo_socioecon), by = c("term", "coef_or_se"), suffix = c("", "_6")) %>%
  arrange(is.na(value), is.na(value_2), is.na(value_3)) %>%
  mutate(value = round(value, 2),
         value_2 = round(value_2, 2),
         value_3 = round(value_3, 2),
         value_4 = round(value_4, 2),
         value_5 = round(value_5, 2),
         value_6 = round(value_6, 2)) %>%
  add_row(term = "Observations",
          value = nobs(DiD_NPAV),
          value_2 = nobs(DiD_NPAV_demo),
          value_3 = nobs(DiD_NPAV_demo_socioecon),
          value_4 = nobs(DiD_PAV),
          value_5 = nobs(DiD_PAV_demo),
          value_6 = nobs(DiD_PAV_demo_socioecon)) %>%
  add_row(term = "Adjusted R-squared",
          value = summary(DiD_NPAV)$adj.r.squared,
          value_2 = summary(DiD_NPAV_demo)$adj.r.squared,
          value_3 = summary(DiD_NPAV_demo_socioecon)$adj.r.squared,
          value_4 = summary(DiD_PAV)$adj.r.squared,
          value_5 = summary(DiD_PAV_demo)$adj.r.squared,
          value_6 = summary(DiD_PAV_demo_socioecon)$adj.r.squared) %>%
  rename(NPAV_Model1 = value, NPAV_Model2 = value_2, NPAV_Model3 = value_3,
         PAV_Model1 = value_4, PAV_Model2 = value_5, PAV_Model3 = value_6)

# Save Table
write.csv(Table_SI7, "./Replication Package/Table SI.7.csv", row.names = FALSE, na = '')

#*******************************************************************************
# Difference-in-Differences Analysis by Demographic and Socioeconomic Characteristics
#   Results Reported in Figure 4
#*******************************************************************************

#*************************
# Age
#*************************

# Heterogeneous Effects by Age
DiD_NPAV_age <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Age_30_to_44 + Age_45_to_64 + Age_65_or_older +
    Age_30_to_44*UVBM + Age_45_to_64*UVBM + Age_65_or_older*UVBM +
    Age_30_to_44*Pri2020 + Age_45_to_64*Pri2020 + Age_65_or_older*Pri2020 +
    Age_30_to_44*UVBM*Pri2020 + Age_45_to_64*UVBM*Pri2020 + Age_65_or_older*UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# Obtain Clustered Standard Errors Associated with Heterogeneous Effects by Age
vcov_age <- vcovCL(DiD_NPAV_age, cluster = ~ Dist)

vector_Age_18_to_29 <- replace(rep(0, length(DiD_NPAV_age$coefficients)), c(7), 1)
vector_Age_30_to_44 <- replace(rep(0, length(DiD_NPAV_age$coefficients)), c(7, 14), 1)
vector_Age_45_to_64 <- replace(rep(0, length(DiD_NPAV_age$coefficients)), c(7, 15), 1)
vector_Age_65_or_older <- replace(rep(0, length(DiD_NPAV_age$coefficients)), c(7, 16), 1)

est_Age_18_to_29 <- sum(vector_Age_18_to_29*DiD_NPAV_age$coefficients)
est_Age_30_to_44 <- sum(vector_Age_30_to_44*DiD_NPAV_age$coefficients)
est_Age_45_to_64 <- sum(vector_Age_45_to_64*DiD_NPAV_age$coefficients)
est_Age_65_or_older <- sum(vector_Age_65_or_older*DiD_NPAV_age$coefficients)

se_Age_18_to_29 <- sqrt(diag(t(vector_Age_18_to_29) %*% vcov_age %*% vector_Age_18_to_29))
se_Age_30_to_44 <- sqrt(diag(t(vector_Age_30_to_44) %*% vcov_age %*% vector_Age_30_to_44))
se_Age_45_to_64 <- sqrt(diag(t(vector_Age_45_to_64) %*% vcov_age %*% vector_Age_45_to_64))
se_Age_65_or_older <- sqrt(diag(t(vector_Age_65_or_older) %*% vcov_age %*% vector_Age_65_or_older))

# Collect Point Estimates and Clustered Standard Errors in a Dataframe
results_DiD_NPAV_age <-
  data.frame(
    group = c("18-29", "30-44", "45-64", "65+"),
    estimate = c(est_Age_18_to_29, est_Age_30_to_44, est_Age_45_to_64, est_Age_65_or_older),
    se = c(se_Age_18_to_29, se_Age_30_to_44, se_Age_45_to_64, se_Age_65_or_older)
  ) %>%
  mutate(
    lower95 = estimate + qnorm(0.025)*se,
    upper95 = estimate + qnorm(0.975)*se
  )

#*************************
# Gender
#*************************

# Heterogeneous Effects by Gender
DiD_NPAV_gender <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Female + Female*UVBM + Female*Pri2020 + Female*UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# Obtain Clustered Standard Errors Associated with Heterogeneous Effects by Gender
vcov_gender <- vcovCL(DiD_NPAV_gender, cluster = ~ Dist)

vector_Male <- replace(rep(0, length(DiD_NPAV_gender$coefficients)), c(5), 1)
vector_Female <- replace(rep(0, length(DiD_NPAV_gender$coefficients)), c(5, 8), 1)

est_Male <- sum(vector_Male*DiD_NPAV_gender$coefficients)
est_Female <- sum(vector_Female*DiD_NPAV_gender$coefficients)

se_Male <- sqrt(diag(t(vector_Male) %*% vcov_gender %*% vector_Male))
se_Female <- sqrt(diag(t(vector_Female) %*% vcov_gender %*% vector_Female))

# Collect Point Estimates and Clustered Standard Errors in a Dataframe
results_DiD_NPAV_gender <-
  data.frame(
    group = c("Male", "Female"),
    estimate = c(est_Male, est_Female),
    se = c(se_Male, se_Female)
  ) %>%
  mutate(
    lower95 = estimate + qnorm(0.025)*se,
    upper95 = estimate + qnorm(0.975)*se
  )

#*************************
# Race/Ethnicity
#*************************

# Heterogeneous Effects by Race/Ethnicity
DiD_NPAV_race <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Black + Hispanic + Asian + Other +
    Black*UVBM + Hispanic*UVBM + Asian*UVBM + Other*UVBM +
    Black*Pri2020 + Hispanic*Pri2020 + Asian*Pri2020 + Other*Pri2020 +
    Black*UVBM*Pri2020 + Hispanic*UVBM*Pri2020 + Asian*UVBM*Pri2020 + Other*UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# Obtain Clustered Standard Errors Associated with Heterogeneous Effects by Race/Ethnicity
vcov_race <- vcovCL(DiD_NPAV_race, cluster = ~ Dist)

vector_White <- replace(rep(0, length(DiD_NPAV_race$coefficients)), c(8), 1)
vector_Black <- replace(rep(0, length(DiD_NPAV_race$coefficients)), c(8, 17), 1)
vector_Hispanic <- replace(rep(0, length(DiD_NPAV_race$coefficients)), c(8, 18), 1)
vector_Asian <- replace(rep(0, length(DiD_NPAV_race$coefficients)), c(8, 19), 1)
vector_Other <- replace(rep(0, length(DiD_NPAV_race$coefficients)), c(8, 20), 1)

est_White <- sum(vector_White*DiD_NPAV_race$coefficients)
est_Black <- sum(vector_Black*DiD_NPAV_race$coefficients)
est_Hispanic <- sum(vector_Hispanic*DiD_NPAV_race$coefficients)
est_Asian <- sum(vector_Asian*DiD_NPAV_race$coefficients)
est_Other <- sum(vector_Other*DiD_NPAV_race$coefficients)

se_White <- sqrt(diag(t(vector_White) %*% vcov_race %*% vector_White))
se_Black <- sqrt(diag(t(vector_Black) %*% vcov_race %*% vector_Black))
se_Hispanic <- sqrt(diag(t(vector_Hispanic) %*% vcov_race %*% vector_Hispanic))
se_Asian <- sqrt(diag(t(vector_Asian) %*% vcov_race %*% vector_Asian))
se_Other <- sqrt(diag(t(vector_Other) %*% vcov_race %*% vector_Other))

# Collect Point Estimates and Clustered Standard Errors in a Dataframe
results_DiD_NPAV_race <-
  data.frame(
    group = c("White", "Black", "Hispanic", "Asian", "Other"),
    estimate = c(est_White, est_Black, est_Hispanic, est_Asian, est_Other),
    se = c(se_White, se_Black, se_Hispanic, se_Asian, se_Other)
  ) %>%
  mutate(
    group = factor(group, levels = c("White", "Black", "Hispanic", "Asian", "Other")),
    lower95 = estimate + qnorm(0.025)*se,
    upper95 = estimate + qnorm(0.975)*se
  )

#*************************
# Education
#*************************

# Heterogeneous Effects by Neighborhood Education Attainment
DiD_NPAV_educ <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    Perc_bachelor_or_more + Perc_bachelor_or_more*UVBM + Perc_bachelor_or_more*Pri2020 + Perc_bachelor_or_more*UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# Obtain Clustered Standard Errors Associated with Heterogeneous Effects by Neighborhood Education Attainment
vcov_educ <- vcovCL(DiD_NPAV_educ, cluster = ~ Dist)

# Collect Point Estimates and Clustered Standard Errors in a Dataframe
data_educ <- data.frame(Perc_bachelor_or_more = c(0:100)) %>%
  mutate(
    estimate = DiD_NPAV_educ$coefficients[5]+Perc_bachelor_or_more*DiD_NPAV_educ$coefficients[8],
    lower95 = estimate + qnorm(0.025)*sqrt(vcov_educ[5,5] + Perc_bachelor_or_more^2*vcov_educ[8,8] + 2*Perc_bachelor_or_more*vcov_educ[5,8]),
    upper95 = estimate + qnorm(0.975)*sqrt(vcov_educ[5,5] + Perc_bachelor_or_more^2*vcov_educ[8,8] + 2*Perc_bachelor_or_more*vcov_educ[5,8])
  )

#*************************
# Income
#*************************

# Heterogeneous Effects by Median Neighborhood Income
DiD_NPAV_inc <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 +
    median_hh_income + median_hh_income*UVBM + median_hh_income*Pri2020 + median_hh_income*UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)

# Obtain Clustered Standard Errors Associated with Heterogeneous Effects by Median Neighborhood Income
vcov_inc <- vcovCL(DiD_NPAV_inc, cluster = ~ Dist)

# Collect Point Estimates and Clustered Standard Errors in a Dataframe
data_inc <- data.frame(median_hh_income = seq(10000, 250000, 1000)) %>%
  mutate(
    estimate = DiD_NPAV_inc$coefficients[5]+median_hh_income*DiD_NPAV_inc$coefficients[8],
    lower95 = estimate + qnorm(0.025)*sqrt(vcov_inc[5,5] + median_hh_income^2*vcov_inc[8,8] + 2*median_hh_income*vcov_inc[5,8]),
    upper95 = estimate + qnorm(0.975)*sqrt(vcov_inc[5,5] + median_hh_income^2*vcov_inc[8,8] + 2*median_hh_income*vcov_inc[5,8])
  )

#*************************
# House Value
#*************************

# Heterogeneous Effects by Median Neighborhood House Value
DiD_NPAV_value <- lm(
  100*Turnout ~ UVBM + Pri2020 + UVBM*Pri2020 + median_value +
    median_value*UVBM + median_value*Pri2020 + median_value*UVBM*Pri2020,
  data = VR_VoteCal_LA_long %>% filter(IsPermanentVBM == "Non-Perm. VBM")
)
# Obtain Clustered Standard Errors Associated with Heterogeneous Effects by Median Neighborhood House Value
vcov_value <- vcovCL(DiD_NPAV_value, cluster = ~ Dist)

# Collect Point Estimates and Clustered Standard Errors in a Dataframe
data_value <- data.frame(median_value = seq(200000, 1000000, 10000)) %>%
  mutate(
    estimate = DiD_NPAV_value$coefficients[5]+median_value*DiD_NPAV_value$coefficients[8],
    lower95 = estimate + qnorm(0.025)*sqrt(vcov_value[5,5] + median_value^2*vcov_value[8,8] + 2*median_value*vcov_value[5,8]),
    upper95 = estimate + qnorm(0.975)*sqrt(vcov_value[5,5] + median_value^2*vcov_value[8,8] + 2*median_value*vcov_value[5,8])
  )

#*******************************************************************************
# Figure 4
#   Difference-in-Differences Estimates by Demographic and Socioeconomic Characteristics
#*******************************************************************************

# Top Left Panel (Age)
Figure_DiD_NPAV_age <- ggplot(results_DiD_NPAV_age, aes(x = group, y = estimate)) +
  geom_point(color = "blue", size = 2) +
  geom_linerange(aes(ymin = lower95, ymax = upper95), size = 1) +
  scale_y_continuous(limits = c(-0.5, 7.5)) +
  labs(title = "",
       x = "Age",
       y = "Diff-in-Diff Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Center Left Panel (Gender)
Figure_DiD_NPAV_gender <- ggplot(results_DiD_NPAV_gender, aes(x = group, y = estimate)) +
  geom_point(color = "blue", size = 2) +
  geom_linerange(aes(ymin = lower95, ymax = upper95), size = 1) +
  scale_y_continuous(limits = c(-0.5, 7.5)) +
  labs(title = "",
       x = "Gender",
       y = "Diff-in-Diff Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Bottom Left Panel (Race/Ethnicity)
Figure_DiD_NPAV_race <- ggplot(results_DiD_NPAV_race, aes(x = group, y = estimate)) +
  geom_point(color = "blue", size = 2) +
  geom_linerange(aes(ymin = lower95, ymax = upper95), size = 1) +
  scale_y_continuous(limits = c(-0.5, 7.5)) +
  labs(title = "",
       x = "Race/Ethnicity",
       y = "Diff-in-Diff Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Top Right Panel (Neighborhood Education Attainment)
Figure_DiD_NPAV_educ <- ggplot(data_educ, aes(x = Perc_bachelor_or_more, y = estimate)) +
  geom_ribbon(aes(ymin = lower95, ymax = upper95), fill = "grey70") +
  geom_line() +
  scale_y_continuous(limits = c(-1.5, 11.6)) +
  labs(title = "",
       x = "ACS Percent w. Bachelor's Degree",
       y = "Diff-in-Diff Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Center Right Panel (Median Neighborhood Income)
Figure_DiD_NPAV_inc <- ggplot(data_inc, aes(x = median_hh_income/1000, y = estimate)) +
  geom_ribbon(aes(ymin = lower95, ymax = upper95), fill = "grey70") +
  geom_line() +
  scale_x_continuous(labels = function(a) paste0(scales::number(a), "k")) +
  scale_y_continuous(limits = c(-1.5, 11.6)) +
  labs(title = "",
       x = "ACS Median HH Income",
       y = "Diff-in-Diff Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Bottom Right Panel (Median Neighborhood House Value)
Figure_DiD_NPAV_value <- ggplot(data_value, aes(x = median_value/1000, y = estimate)) +
  geom_ribbon(aes(ymin = lower95, ymax = upper95), fill = "grey70") +
  geom_line() +
  scale_x_continuous(labels = function(a) paste0(scales::number(a), "k")) +
  scale_y_continuous(limits = c(-1.5, 11.6)) +
  labs(title = "",
       x = "ACS Median Home Valuation",
       y = "Diff-in-Diff Estimate") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Full Figure
Figure_DiD_demo_socioecon <-
  Figure_DiD_NPAV_age + Figure_DiD_NPAV_educ +
  Figure_DiD_NPAV_gender + Figure_DiD_NPAV_inc +
  Figure_DiD_NPAV_race + Figure_DiD_NPAV_value +
  plot_layout(ncol = 2)

# Save Figure
ggsave("./Replication Package/Figure 4.pdf", Figure_DiD_demo_socioecon,
       width = 8, height = 8, units = "in", dpi = 600)

#*******************************************************************************
# Difference-in-Differences Analysis with Covariate-Specific Trends ala Callaway and Sant'Anna (2021)
#   Results Reported in Table 1
#*******************************************************************************

# Non-Permanent VBM Voters
# Covariates: Demographics
DiD_NPAV_CS_demo <- att_gt(
  yname = "Turnout", tname = "Period", idname = "Row_No", gname = "Group",
  xformla = ~ Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other,
  data = VR_VoteCal_LA_long %>%
    filter(IsPermanentVBM == "Non-Perm. VBM", !is.na(Age_30_to_44), !is.na(Age_45_to_64), !is.na(Age_65_or_older),
           !is.na(Female), !is.na(Black), !is.na(Hispanic), !is.na(Asian), !is.na(Other)),
  clustervars = "Dist"
)

# Covariates: Demographics and Median Neighborhood House Value
DiD_NPAV_CS_demo_housing <- att_gt(
  yname = "Turnout", tname = "Period", idname = "Row_No", gname = "Group",
  xformla = ~ Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other + median_value,
  data = VR_VoteCal_LA_long %>%
    filter(IsPermanentVBM == "Non-Perm. VBM", !is.na(Age_30_to_44), !is.na(Age_45_to_64), !is.na(Age_65_or_older),
           !is.na(Female), !is.na(Black), !is.na(Hispanic), !is.na(Asian), !is.na(Other), !is.na(median_value)),
  clustervars = "Dist"
)

# Covariates: Demographics, Median Neighborhood House Value, Neighborhood Education Attainment, and Median Neighborhood Income
DiD_NPAV_CS_demo_socioecon <- att_gt(
  yname = "Turnout", tname = "Period", idname = "Row_No", gname = "Group",
  xformla = ~ Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other + Perc_bachelor_or_more + median_hh_income + median_value,
  data = VR_VoteCal_LA_long  %>%
    filter(IsPermanentVBM == "Non-Perm. VBM", !is.na(Age_30_to_44), !is.na(Age_45_to_64), !is.na(Age_65_or_older),
           !is.na(Female), !is.na(Black), !is.na(Hispanic), !is.na(Asian), !is.na(Other),
           !is.na(Perc_bachelor_or_more), !is.na(median_hh_income), !is.na(median_value)),
  clustervars = "Dist"
)

# Permanent VBM Voters
# Covariates: Demographics
DiD_PAV_CS_demo <- att_gt(
  yname = "Turnout", tname = "Period", idname = "Row_No", gname = "Group",
  xformla = ~ Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other,
  data = VR_VoteCal_LA_long %>%
    filter(IsPermanentVBM == "Perm. VBM", !is.na(Age_30_to_44), !is.na(Age_45_to_64), !is.na(Age_65_or_older),
           !is.na(Female), !is.na(Black), !is.na(Hispanic), !is.na(Asian), !is.na(Other)),
  clustervars = "Dist"
)

# Covariates: Demographics and Median Neighborhood House Value
DiD_PAV_CS_demo_housing <- att_gt(
  yname = "Turnout", tname = "Period", idname = "Row_No", gname = "Group",
  xformla = ~ Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other + median_value,
  data = VR_VoteCal_LA_long  %>%
    filter(IsPermanentVBM == "Perm. VBM", !is.na(Age_30_to_44), !is.na(Age_45_to_64), !is.na(Age_65_or_older),
           !is.na(Female), !is.na(Black), !is.na(Hispanic), !is.na(Asian), !is.na(Other), !is.na(median_value)),
  clustervars = "Dist"
)

# Covariates: Demographics, Median Neighborhood House Value, Neighborhood Education Attainment, and Median Neighborhood Income
DiD_PAV_CS_demo_socioecon <- att_gt(
  yname = "Turnout", tname = "Period", idname = "Row_No", gname = "Group",
  xformla = ~ Age_30_to_44 + Age_45_to_64 + Age_65_or_older + Female + Black + Hispanic + Asian + Other + Perc_bachelor_or_more + median_hh_income + median_value,
  data = VR_VoteCal_LA_long  %>%
    filter(IsPermanentVBM == "Perm. VBM", !is.na(Age_30_to_44), !is.na(Age_45_to_64), !is.na(Age_65_or_older),
           !is.na(Female), !is.na(Black), !is.na(Hispanic), !is.na(Asian), !is.na(Other),
           !is.na(Perc_bachelor_or_more), !is.na(median_hh_income), !is.na(median_value)),
  clustervars = "Dist"
)

# Save the results for future reference (to avoid re-running the analysis)
# save(DiD_NPAV_CS_demo, DiD_NPAV_CS_demo_housing, DiD_NPAV_CS_demo_socioecon,
#      DiD_PAV_CS_demo, DiD_PAV_CS_demo_housing, DiD_PAV_CS_demo_socioecon,
#      file = "./Replication Package/DiD_CS.RData")

#*******************************************************************************
# Table 1
#   Difference-in-Differences Estimates with Covariate-Specific Trends ala Callaway and Sant'Anna (2021)
#*******************************************************************************

# Full Table
Table1 <- data.frame(
  Voter_Type = c("Non-Perm. VBM", "Non-Perm. VBM", "Non-Perm. VBM", "Perm. VBM", "Perm. VBM", "Perm. VBM"),
  Estimate = c(DiD_NPAV_CS_demo$att, DiD_NPAV_CS_demo_housing$att, DiD_NPAV_CS_demo_socioecon$att,
               DiD_PAV_CS_demo$att, DiD_PAV_CS_demo_housing$att, DiD_PAV_CS_demo_socioecon$att),
  Std_Error = c(DiD_NPAV_CS_demo$se, DiD_NPAV_CS_demo_housing$se, DiD_NPAV_CS_demo_socioecon$se,
                DiD_PAV_CS_demo$se, DiD_PAV_CS_demo_housing$se, DiD_PAV_CS_demo_socioecon$se),
  Age = c("Included", "Included", "Included", "Included", "Included", "Included"),
  Gender = c("Included", "Included", "Included", "Included", "Included", "Included"),
  Race_Ethnicity = c("Included", "Included", "Included", "Included", "Included", "Included"),
  ACS_Median_House_Value = c("Excluded", "Included", "Included", "Excluded", "Included", "Included"),
  ACS_Education_Attainment = c("Excluded", "Excluded", "Included", "Excluded", "Excluded", "Included"),
  ACS_Median_HH_Income = c("Excluded", "Excluded", "Included", "Excluded", "Excluded", "Included")
) %>%
  mutate(Estimate = round(100*Estimate, 1), Std_Error = round(100*Std_Error, 1)) %>%
  t() %>%
  as.data.frame() %>%
  rename(NPAV_Model1 = V1, NPAV_Model2 = V2, NPAV_Model3 = V3,
         PAV_Model1 = V4, PAV_Model2 = V5, PAV_Model3 = V6)

# Save Table
write.csv(Table1, file = "./Replication Package/Table 1.csv", row.names = TRUE, na = '')
